home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Amiga Plus 2000 #5
/
Amiga Plus CD - 2000 - No. 5.iso
/
Tools
/
Dev
/
lame_src
/
i386
/
choose_table.nas
next >
Wrap
Text File
|
2000-01-01
|
6KB
|
352 lines
; new count bit routine
; part of this code is origined from
; new GOGO-no-coda (1999, 2000)
; Copyright (C) 1999 shigeo
; modified by Keiichi SAKAI
%include "nasm.h"
globaldef choose_table_MMX
globaldef MMX_masking
externdef largetbl
externdef t1l
externdef table23
externdef table56
externdef tableABC
externdef tableDEF
externdef linbits32
externdef choose_table_H
segment_data
align 16
D14_14_14_14 dd 0x000E000E, 0x000E000E
D15_15_15_15 dd 0xfff0fff0, 0xfff0fff0
mul_add dd 0x00010010, 0x00010010
mul_add23 dd 0x00010003, 0x00010003
mul_add56 dd 0x00010004, 0x00010004
choose_jump_table_L:
dd table_MMX.L_case_0
dd table_MMX.L_case_1
dd table_MMX.L_case_2
dd table_MMX.L_case_3
dd table_MMX.L_case_45
dd table_MMX.L_case_45
dd table_MMX.L_case_67
dd table_MMX.L_case_67
dd table_MMX.L_case_8_15
dd table_MMX.L_case_8_15
dd table_MMX.L_case_8_15
dd table_MMX.L_case_8_15
dd table_MMX.L_case_8_15
dd table_MMX.L_case_8_15
dd table_MMX.L_case_8_15
dd table_MMX.L_case_8_15
segment_code
;
; use MMX
;
align 16
; int choose_table(int *ix, int *end, int *s)
choose_table_MMX:
mov ecx,[esp+4] ;ecx = begin
mov edx,[esp+8] ;edx = end
xor eax,eax
sub ecx,edx ;ecx = begin-end(should be minus)
pxor mm0,mm0 ;mm0=[0:0]
pxor mm1,mm1 ;mm1=[0:0]
test ecx,8
jz .lp
movq mm1,[edx+ecx]
add ecx,8
jz .exit
align 4
.lp:
movq mm4,[edx+ecx]
movq mm5,[edx+ecx+8]
add ecx,16
psubusw mm4,mm0 ; $BK\Ev$O(B dword $B$G$J$$$H$$$1$J$$$N$@$,(B
psubusw mm5,mm1 ; $B$=$s$J%3%^%s%I$O$J$$(B :-p
paddw mm0,mm4 ; $B$,(B, $B$3$3$G07$&CM$NHO0O$O(B 8191+15 $B0J2<$J$N$GLdBj$J$$(B
paddw mm1,mm5
jnz .lp
.exit:
psubusw mm1,mm0 ; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B
paddw mm0,mm1
movq mm4,mm0
punpckhdq mm4,mm4
psubusw mm4,mm0 ; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B
paddw mm0,mm4
movd eax,mm0
cmp eax,15
ja .with_ESC
jmp [choose_jump_table_L+eax*4]
.with_ESC1:
emms
mov ecx, [esp+12] ; *s
mov [ecx], eax
or eax,-1
ret
.with_ESC:
cmp eax, 8191+15
ja .with_ESC1
sub eax,15
push ebx
push esi
bsr eax, eax
%assign _P 4*2
movq mm5, [D15_15_15_15]
movq mm6, [D14_14_14_14]
movq mm3, [mul_add]
mov ecx, [esp+_P+4] ; = ix
; mov edx, [esp+_P+8] ; = end
sub ecx, edx
xor esi, esi ; sum = 0
pxor mm7, mm7 ; linbits_sum, 14$B$r1[$($?$b$N$N?t(B
test ecx, 8
jz .H_dual_lp1
movq mm0, [edx+ecx]
packssdw mm0,mm7
movq mm2, mm0
paddusw mm0, mm5 ; mm0 = min(ix, 15)+0xfff0
pcmpgtw mm2, mm6 ; 14$B$h$jBg$-$$$+!)(B
psubw mm7, mm2 ; 14$B$h$jBg$-$$$H$-(B linbits_sum++;
pmaddwd mm0, mm3 ; {0, 0, y, x}*{1, 16, 1, 16}
movd ebx, mm0
mov esi, [largetbl+ebx*4+(16*16+16)*4]
add ecx,8
jz .H_dual_exit
align 4
.H_dual_lp1:
movq mm0, [edx+ecx]
movq mm1, [edx+ecx+8]
packssdw mm0,mm1
movq mm2, mm0
paddusw mm0, mm5 ; mm0 = min(ix, 15)+0xfff0
pcmpgtw mm2, mm6 ; 14$B$h$jBg$-$$$+!)(B
pmaddwd mm0, mm3 ; {y, x, y, x}*{1, 16, 1, 16}
movd ebx, mm0
punpckhdq mm0,mm0
add esi, [largetbl+ebx*4+(16*16+16)*4]
movd ebx, mm0
add esi, [largetbl+ebx*4+(16*16+16)*4]
add ecx, 16
psubw mm7, mm2 ; 14$B$h$jBg$-$$$H$-(B linbits_sum++;
jnz .H_dual_lp1
.H_dual_exit:
pmov mm1,mm7
punpckhdq mm7,mm7
paddd mm7,mm1
punpckldq mm7,mm7
pmaddwd mm7, [linbits32+eax*8] ; linbits
mov ax, [choose_table_H+eax*2]
movd ecx, mm7
punpckhdq mm7,mm7
movd edx,mm7
emms
shl edx, 16
add ecx, edx
add ecx, esi
pop esi
pop ebx
mov edx, ecx
and ecx, 0xffff ; ecx = sum2
shr edx, 16 ; edx = sum
cmp edx, ecx
jle .chooseE_s1
mov edx, ecx
shr eax, 8
.chooseE_s1:
mov ecx, [esp+12] ; *s
and eax, 0xff
add [ecx], edx
ret
table_MMX.L_case_0:
emms
ret
table_MMX.L_case_1:
emms
mov eax, [esp+12] ; *s
mov ecx, [esp+4] ; *ix
sub ecx, edx
push ebx
.lp:
mov ebx, [edx+ecx]
add ebx, ebx
add ebx, [edx+ecx+4]
movzx ebx, byte [ebx+t1l]
add [eax], ebx
add ecx, 8
jnz .lp
pop ebx
mov eax, 1
ret
table_MMX.L_case_45:
push dword 7
mov ecx, tableABC+9*8
jmp from3
table_MMX.L_case_67:
push dword 10
mov ecx, tableABC
jmp from3
table_MMX.L_case_8_15:
push dword 13
mov ecx, tableDEF
from3:
mov eax,[esp+8] ;eax = *begin
; mov edx,[esp+12] ;edx = *end
push ebx
sub eax, edx
movq mm5,[mul_add]
pxor mm2,mm2 ;mm2 = sum
test eax, 8
jz .choose3_lp1
; odd length
movq mm0,[edx+eax] ;mm0 = ix[0] | ix[1]
packssdw mm0,mm2
pmaddwd mm0,mm5
movd ebx,mm0
movq mm2, [ecx+ebx*8]
add eax,8
jz .choose3_exit
align 4
.choose3_lp1
movq mm0,[edx+eax]
movq mm1,[edx+eax+8]
packssdw mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3]
pmaddwd mm0,mm5
movd ebx,mm0
punpckhdq mm0,mm0
paddd mm2, [ecx+ebx*8]
movd ebx,mm0
add eax,16
paddd mm2, [ecx+ebx*8]
jnz .choose3_lp1
.choose3_exit
; xor eax,eax
movd ebx, mm2
punpckhdq mm2,mm2
mov ecx, ebx
and ecx, 0xffff ; ecx = sum2
shr ebx, 16 ; ebx = sum1
movd edx, mm2 ; edx = sum
cmp edx, ebx
jle .choose3_s1
mov edx, ebx
inc eax
.choose3_s1:
emms
pop ebx
cmp edx, ecx
jle .choose3_s2
mov edx, ecx
mov eax, 2
.choose3_s2:
pop ecx
add eax, ecx
mov ecx, [esp+12] ; *s
add [ecx], edx
ret
table_MMX.L_case_2:
push dword 2
mov ecx,table23
pmov mm5,[mul_add23]
jmp from2
table_MMX.L_case_3:
push dword 5
mov ecx,table56
pmov mm5,[mul_add56]
from2:
mov eax,[esp+8] ;eax = *begin
; mov edx,[esp+12] ;edx = *end
push ebx
push edi
sub eax, edx
xor edi, edi
test eax, 8
jz .choose2_lp1
; odd length
movq mm0,[edx+eax] ;mm0 = ix[0] | ix[1]
pxor mm2,mm2 ;mm2 = sum
packssdw mm0,mm2
pmaddwd mm0,mm5
movd ebx,mm0
mov edi, [ecx+ebx*4]
add eax,8
jz .choose2_exit
align 4
.choose2_lp1
movq mm0,[edx+eax]
movq mm1,[edx+eax+8]
packssdw mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3]
pmaddwd mm0,mm5
movd ebx,mm0
punpckhdq mm0,mm0
add edi, [ecx+ebx*4]
movd ebx, mm0
add edi, [ecx+ebx*4]
add eax,16
jnc .choose2_lp1
.choose2_exit
mov ecx, edi
pop edi
pop ebx
pop eax ; table num.
emms
mov edx, ecx
and ecx, 0xffff ; ecx = sum2
shr edx, 16 ; edx = sum1
cmp edx, ecx
jle .choose2_s1
mov edx, ecx
inc eax
.choose2_s1:
mov ecx, [esp+12] ; *s
add [ecx], edx
ret
end